Partners
Suggested timeline:
by Tues, Nov 7 (one week): Finish cleaning data and come up with hypotheses about the data
by Tues, Nov 14 (two weeks): Create exploratory plots to answer those hypotheses/questions
by Tues, Nov 28 (four weeks): Write up findings and clean up graphs
by Tues, Dec 5 (five weeks): Project due
Month of November: Come to TA office hours to get feedback on your work!
Write-as-you-go!!!
https://www.r-bloggers.com/three-tips-for-posting-good-questions-to-r-help-and-stack-overflow/
https://gist.github.com/hadley/270442
https://stackoverflow.com/help/mcve
Email me a draft of your question and I will help you edit it.
a friendlier forum for non-precise questions
textbook, Chapter 11
(readr) http://r4ds.had.co.nz/data-import.html
(lubridate) http://r4ds.had.co.nz/dates-and-times.html
# data from: April 18, 2017
# lines for time series data
library(tidyverse)
df <- read_csv("mortgage.csv")
df <- df %>% gather(key = TYPE, value = RATE, -DATE)
ggplot(df, aes(DATE, RATE, fill = TYPE)) + geom_col() +
ggtitle("U.S. Mortgage Rates", subtitle = "Not recommended!")ggplot(df, aes(DATE, RATE, color = TYPE)) + geom_line() +
ggtitle("U.S. Mortgage Rates")df$TYPE <- factor(df$TYPE,
levels = c("30 YR FIXED", "15 YR FIXED",
"5/1 ARM"))
g <- ggplot(df, aes(DATE, RATE, color = TYPE)) + geom_line() +
ggtitle("U.S. Mortgage Rates") +
labs (x = "", y = "percent") +
theme_grey(16) +
theme(legend.title = element_blank())
glibrary(lubridate)
df2010 <- df %>% filter(year(DATE) == 2010)
ggplot(df2010, aes(DATE, RATE, color = TYPE)) + geom_line() +
ggtitle("U.S. Mortgage Rates")set.seed(5702)
day <- 1:31
number <- 10 * (day - 14)^2 + 2000 + rnorm(1:31, 0, 400)
df <- data.frame(day, number)
ggplot(df, aes(day, number)) +
geom_line(color = "deeppink") +
geom_point(color = "deeppink") +
scale_x_continuous(breaks = 1:31) +
scale_y_continuous(limits = c(0, 5000)) +
ggtitle("Average Motor Vehicle Inspections per Day",
subtitle = "(fake data)") +
labs(x = "day of month", y="number of inspections") +
theme(plot.title = element_text(size = 16))ggplot(df, aes(day, number)) +
geom_col() +
scale_x_continuous(breaks = 1:31) +
ggtitle("Average Motor Vehicle Inspections per Day") +
labs(x = "day of month", y="number of inspections") +
theme(plot.title = element_text(size = 16))dfman <- read_csv("ManchesterByTheSea.csv")
head(dfman)| Day | Date | Gross |
|---|---|---|
| Fri | 2016-11-18 | 72658 |
| Sat | 2016-11-19 | 90040 |
| Sun | 2016-11-20 | 93800 |
| Mon | 2016-11-21 | 32023 |
| Tue | 2016-11-22 | 29989 |
| Wed | 2016-11-23 | 41532 |
g <- ggplot(dfman, aes(Date, Gross)) +
geom_line() +
ggtitle("Manchester by the Sea",
"Daily Gross, United States")
gg + geom_line(color = "green") +
geom_point(color = "green") + theme_halloweenlibrary(lubridate)
g + geom_point(aes(color = factor(wday(Date, label = TRUE))))g <- ggplot(dfman, aes(Date, Gross)) + geom_line() +
ggtitle("Manchester by the Sea",
"Daily Gross, United States")
saturday <- dfman %>% filter(wday(Date) == 7)
g + geom_point(data = saturday, aes(Date, Gross),
color = "deeppink")g + geom_text(data = saturday,
aes(Date, Gross, label = "Sat"),
color = "deeppink", size = 4)g + geom_point(size = 1) +
geom_text(data = saturday,
aes(Date, Gross + 50000, label = "Sat"),
color = "deeppink", size = 4) +
geom_point(data = saturday, aes(Date, Gross),
color = "deeppink", size = 1.5)library(plotly)
# this works
dfman <-read_csv("ManchesterByTheSea.csv")
p <- plot_ly(
dfman, x = ~Date, y = ~Gross,
type = 'scatter',
mode = 'lines+markers',
# Hover text:
hoverinfo = 'text',
text = ~paste(Day)
)
pg + geom_point(data = saturday, aes(Date, Gross),
color = "deeppink") +
geom_smooth(color = "blue", se = FALSE)g <- ggplot(dfman, aes(Date, Gross)) + geom_point()
g + geom_smooth(span = .15, se = FALSE)g + geom_smooth(span = .4, se = FALSE)g + geom_smooth(span = .6, se = FALSE)g + geom_smooth(span = .8, se = FALSE)g + geom_smooth(span = 1, se = FALSE)g + geom_smooth(span = .05)# lines grouped by color
ggplot(dfman, aes(Date, Gross)) +
geom_smooth(color = "black") +
geom_line(aes(color = wday(Date, label = TRUE)),
lwd = 1) +
geom_smooth(color = "black") +
theme(legend.position = "bottom",
legend.title = element_blank())ggplot(dfman, aes(Date, Gross)) +
geom_line(color = "grey30") + geom_point(size = 1) +
facet_grid(.~wday(Date, label = TRUE))ggplot(dfman, aes(Date, Gross)) +
geom_line(color = "grey30") + geom_point(size = 1) +
facet_grid(.~wday(Date, label = TRUE)) +
geom_smooth(se = FALSE)christmas <- dfman %>%
filter(Date >= as.Date("2016-12-20") &
Date <= ("2017-01-03"))
ggplot(christmas, aes(Date, Gross)) +
geom_label(aes(label = wday(Date, label = TRUE))) +
geom_line(color = "cornflowerblue") +
scale_x_date(date_labels = "%b\n%d",
date_breaks = "1 day") +
ggtitle("Chistmas Week")ggplot(christmas, aes(Date, Gross/1000000)) +
geom_line(color = "cornflowerblue", lwd = 1.1) +
geom_point(color = "cornflowerblue", size = 2) +
geom_label(data = christmas,
aes(x = Date, y = Gross/1000000 + .06,
label = day(Date))) +
scale_x_date(date_labels = "%a",
date_breaks = "1 day") +
ggtitle("Manchester by the Sea",
"Chistmas Week Box Office Gross") +
labs(x = "Dec 2016 - Jan 2017",
y = "Daily Gross (in millions $US)") +
theme_grey(14)# annotate Christmas Week
start <- as.Date("2016-12-24")
end <- as.Date("2017-01-02")
g + annotate("rect", xmin = start, xmax = end,
ymin = -Inf, ymax = Inf, fill = "green",
alpha = .2) +
annotate("text", x = end + 2,
y = 1500000, label = "Dec 24 - Jan 2",
color = "green", hjust = 0) +
theme_classic()# with weekly summary
weekly <- dfman %>%
group_by(Year = year(Date),
Week = week(Date)) %>%
summarize(AvgWeeklyGross = mean(Gross)) %>%
mutate(Date = as.Date("2015-12-27") +
365*(Year - 2016) +
7*(Week -1))# first attempt
ggplot(dfman, aes(Date, Gross)) +
geom_line() +
geom_line(data = weekly,
aes(Date, AvgWeeklyGross),
color = "blue") +
geom_smooth(color = "deeppink")# cleaned up
ggplot(dfman, aes(Date, Gross/1000000)) +
geom_line(color = "grey30") +
geom_line(data = weekly,
aes(Date, AvgWeeklyGross/1000000),
color = "blue", lwd = 1.5) +
geom_smooth(color = "deeppink", lwd = 1.5,
se = FALSE) +
annotate("text", x = as.Date("2017-02-15"),
y = 1.65, label = "average weekly gross",
color = "blue", hjust = 0) +
annotate("segment", x = as.Date("2017-02-01"),
xend = as.Date("2017-02-12"), y = 1.65,
yend = 1.65, color = "blue", lwd = 1.5) +
annotate("text", x = as.Date("2017-02-15"),
y = 1.5, label = "geom_smooth()",
color = "deeppink", hjust = 0) +
annotate("segment", x = as.Date("2017-02-01"),
xend = as.Date("2017-02-12"), y = 1.5,
yend = 1.5, color = "deeppink", lwd = 1.5) +
scale_x_date(date_labels = "%b\n%Y") +
ylab("Daily Box Office Gross \n (in millions US$)") +
xlab("") +
ggtitle("Manchester by the Sea",
"Daily Gross, United States") +
theme_bw(16)# weekends
weekends <- dfman %>%
mutate(daytype = ifelse(wday(Date) %in% c(6, 7, 1),
"Fri-Sun", "Mon-Thurs"))
g <- ggplot(weekends, aes(Date, Gross)) +
geom_line(aes(color = daytype)) +
theme(legend.title = element_blank())
g